###############################
# BCCN_ReplicationRun.R
# Run file to replicate
#   Box-Steffensmeier et. al 
#   analysis
# Author: B Campbell
###############################

#######################################################################################
# Prepare data 
#######################################################################################
options(stringsAsFactors = FALSE) 
set.seed(1234)
require(statnet)

# Specify the K parameter, the number of nodes out from the ego to include.
K <- 3

# Specify the minimize size of an egonetwork to consider, this parameter is probably irrelevant given that none of our ego nets are smaller than 30.  
MINSIZE <- 30

# Specify the G parameter, or the number of groups or egonetwork types.  
G <- 3

START="READ"

# Then, load the data.  Will need to change wd and location of ac-net-env-case.RData obviously
setwd("/Users/benjamincampbell/Dropbox/int group environment cases/data and code/EGO_ERGM_Model_Code/Model_Scripts/ReplicationArchive")
load("ac-net-env-case.RData") # imports a network object "net"

pdf("Figure2.pdf")
plot(net)
dev.off()

# Recoding variables - here I want to recode into 6 categories based upon standardization and giving NA's their own bin
budgetNew <- rep(0, times = length(net %v% "budget"))
budget <- net %v% "budget"

for(i in 1:length((budget))){
  if(is.na((budget)[[i]]) == TRUE){
    budgetNew[[i]] <- 0
  }
  else if(0 < (budget)[[i]] && (budget)[[i]] < (mean(budget, na.rm = TRUE) - 2*sd(budget, na.rm = TRUE))){
    budgetNew[[i]] <- 1
  }
  else if((mean(budget, na.rm = TRUE) -2*sd(budget, na.rm = TRUE)) < (budget)[[i]] && (budget)[[i]] < (mean(budget, na.rm = TRUE) -1*sd(budget, na.rm = TRUE))){
    budgetNew[[i]] <- 2
  }
  else if((mean(budget, na.rm = TRUE) -1*sd(budget, na.rm = TRUE)) < (budget)[[i]] && (budget)[[i]] < mean(budget, na.rm = TRUE)){
    budgetNew[[i]] <- 3
  }
  else if(mean(budget, na.rm = TRUE) < (budget)[[i]] && (budget)[[i]] < (mean(budget, na.rm = TRUE) + 1*sd(budget, na.rm = TRUE))){
    budgetNew[[i]] <- 4
  }
  else if ((mean(budget, na.rm = TRUE) + 1*sd(budget, na.rm = TRUE)) < (budget)[[i]] && (budget)[[i]] < (mean(budget, na.rm = TRUE) + 2*sd(budget, na.rm = TRUE))){
    budgetNew[[i]] <- 5
  }
  else budgetNew[[i]] <- 6
}
table(budgetNew)
net %v% "budgetNew" <- budgetNew

plantSizeNew <- rep(0, times = length(net %v% "plantSize"))
plantSize <- net %v% "plantSize"

for(i in 1:length((plantSize))){
  if(is.na((plantSize)[[i]]) == TRUE){
    plantSizeNew[[i]] <- 0
  }
  else if(0 < (plantSize)[[i]] && (plantSize)[[i]] < (mean(plantSize, na.rm = TRUE) - 2*sd(plantSize, na.rm = TRUE))){
    plantSizeNew[[i]] <- 1
  }
  else if((mean(plantSize, na.rm = TRUE) -2*sd(plantSize, na.rm = TRUE)) < (plantSize)[[i]] && (plantSize)[[i]] < (mean(plantSize, na.rm = TRUE) -1*sd(plantSize, na.rm = TRUE))){
    plantSizeNew[[i]] <- 2
  }
  else if((mean(plantSize, na.rm = TRUE) -1*sd(plantSize, na.rm = TRUE)) < (plantSize)[[i]] && (plantSize)[[i]] < mean(plantSize, na.rm = TRUE)){
    plantSizeNew[[i]] <- 3
  }
  else if(mean(plantSize, na.rm = TRUE) < (plantSize)[[i]] && (plantSize)[[i]] < (mean(plantSize, na.rm = TRUE) + 1*sd(plantSize, na.rm = TRUE))){
    plantSizeNew[[i]] <- 4
  }
  else if ((mean(plantSize, na.rm = TRUE) + 1*sd(plantSize, na.rm = TRUE)) < (plantSize)[[i]] && (plantSize)[[i]] < (mean(plantSize, na.rm = TRUE) + 2*sd(plantSize, na.rm = TRUE))){
    plantSizeNew[[i]] <- 5
  }
  else plantSizeNew[[i]] <- 6
}
table(plantSizeNew)
net %v% "plantSizeNew" <- plantSizeNew

staffNew <- rep(0, times = length(net %v% "staff"))
staff <- net %v% "staff"

for(i in 1:length((staff))){
  if(is.na((staff)[[i]]) == TRUE){
    staffNew[[i]] <- 0
  }
  else if(0 < (staff)[[i]] && (staff)[[i]] < (mean(staff, na.rm = TRUE) - 2*sd(staff, na.rm = TRUE))){
    staffNew[[i]] <- 1
  }
  else if((mean(staff, na.rm = TRUE) -2*sd(staff, na.rm = TRUE)) < (staff)[[i]] && (staff)[[i]] < (mean(staff, na.rm = TRUE) -1*sd(staff, na.rm = TRUE))){
    staffNew[[i]] <- 2
  }
  else if((mean(staff, na.rm = TRUE) -1*sd(staff, na.rm = TRUE)) < (staff)[[i]] && (staff)[[i]] < mean(staff, na.rm = TRUE)){
    staffNew[[i]] <- 3
  }
  else if(mean(staff, na.rm = TRUE) < (staff)[[i]] && (staff)[[i]] < (mean(staff, na.rm = TRUE) + 1*sd(staff, na.rm = TRUE))){
    staffNew[[i]] <- 4
  }
  else if ((mean(staff, na.rm = TRUE) + 1*sd(staff, na.rm = TRUE)) < (staff)[[i]] && (staff)[[i]] < (mean(staff, na.rm = TRUE) + 2*sd(staff, na.rm = TRUE))){
    staffNew[[i]] <- 5
  }
  else staffNew[[i]] <- 6
}
table(staffNew)
net %v% "staffNew" <- staffNew

membersNew <- rep(0, times = length(net %v% "members"))
members <- net %v% "members"

for(i in 1:length((members))){
  if(is.na((members)[[i]]) == TRUE){
    membersNew[[i]] <- 0
  }
  else if(0 < (members)[[i]] && (members)[[i]] < (mean(members, na.rm = TRUE) - 2*sd(members, na.rm = TRUE))){
    membersNew[[i]] <- 1
  }
  else if((mean(members, na.rm = TRUE) -2*sd(members, na.rm = TRUE)) < (members)[[i]] && (members)[[i]] < (mean(members, na.rm = TRUE) -1*sd(members, na.rm = TRUE))){
    membersNew[[i]] <- 2
  }
  else if((mean(members, na.rm = TRUE) -1*sd(members, na.rm = TRUE)) < (members)[[i]] && (members)[[i]] < mean(members, na.rm = TRUE)){
    membersNew[[i]] <- 3
  }
  else if(mean(members, na.rm = TRUE) < (members)[[i]] && (members)[[i]] < (mean(members, na.rm = TRUE) + 1*sd(members, na.rm = TRUE))){
    membersNew[[i]] <- 4
  }
  else if ((mean(members, na.rm = TRUE) + 1*sd(members, na.rm = TRUE)) < (members)[[i]] && (members)[[i]] < (mean(members, na.rm = TRUE) + 2*sd(members, na.rm = TRUE))){
    membersNew[[i]] <- 5
  }
  else membersNew[[i]] <- 6
}
table(membersNew)
net %v% "membersNew" <- membersNew

salesAllNew <- rep(0, times = length(net %v% "salesAll"))
salesAll <- net %v% "salesAll"

for(i in 1:length((salesAll))){
  if(is.na((salesAll)[[i]]) == TRUE){
    salesAllNew[[i]] <- 0
  }
  else if(0 < (salesAll)[[i]] && (salesAll)[[i]] < (mean(salesAll, na.rm = TRUE) - 2*sd(salesAll, na.rm = TRUE))){
    salesAllNew[[i]] <- 1
  }
  else if((mean(salesAll, na.rm = TRUE) -2*sd(salesAll, na.rm = TRUE)) < (salesAll)[[i]] && (salesAll)[[i]] < (mean(salesAll, na.rm = TRUE) -1*sd(salesAll, na.rm = TRUE))){
    salesAllNew[[i]] <- 2
  }
  else if((mean(salesAll, na.rm = TRUE) -1*sd(salesAll, na.rm = TRUE)) < (salesAll)[[i]] && (salesAll)[[i]] < mean(salesAll, na.rm = TRUE)){
    salesAllNew[[i]] <- 3
  }
  else if(mean(salesAll, na.rm = TRUE) < (salesAll)[[i]] && (salesAll)[[i]] < (mean(salesAll, na.rm = TRUE) + 1*sd(salesAll, na.rm = TRUE))){
    salesAllNew[[i]] <- 4
  }
  else if ((mean(salesAll, na.rm = TRUE) + 1*sd(salesAll, na.rm = TRUE)) < (salesAll)[[i]] && (salesAll)[[i]] < (mean(salesAll, na.rm = TRUE) + 2*sd(salesAll, na.rm = TRUE))){
    salesAllNew[[i]] <- 5
  }
  else salesAllNew[[i]] <- 6
}
table(salesAllNew)
net %v% "salesAllNew" <- salesAllNew

currentEmployeesNew <- rep(0, times = length(net %v% "currentEmployees"))
currentEmployees <- net %v% "currentEmployees"

for(i in 1:length((currentEmployees))){
  if(is.na((currentEmployees)[[i]]) == TRUE){
    currentEmployeesNew[[i]] <- 0
  }
  else if(0 < (currentEmployees)[[i]] && (currentEmployees)[[i]] < (mean(currentEmployees, na.rm = TRUE) - 2*sd(currentEmployees, na.rm = TRUE))){
    currentEmployeesNew[[i]] <- 1
  }
  else if((mean(currentEmployees, na.rm = TRUE) -2*sd(currentEmployees, na.rm = TRUE)) < (currentEmployees)[[i]] && (currentEmployees)[[i]] < (mean(currentEmployees, na.rm = TRUE) -1*sd(currentEmployees, na.rm = TRUE))){
    currentEmployeesNew[[i]] <- 2
  }
  else if((mean(currentEmployees, na.rm = TRUE) -1*sd(currentEmployees, na.rm = TRUE)) < (currentEmployees)[[i]] && (currentEmployees)[[i]] < mean(currentEmployees, na.rm = TRUE)){
    currentEmployeesNew[[i]] <- 3
  }
  else if(mean(currentEmployees, na.rm = TRUE) < (currentEmployees)[[i]] && (currentEmployees)[[i]] < (mean(currentEmployees, na.rm = TRUE) + 1*sd(currentEmployees, na.rm = TRUE))){
    currentEmployeesNew[[i]] <- 4
  }
  else if ((mean(currentEmployees, na.rm = TRUE) + 1*sd(currentEmployees, na.rm = TRUE)) < (currentEmployees)[[i]] && (currentEmployees)[[i]] < (mean(currentEmployees, na.rm = TRUE) + 2*sd(currentEmployees, na.rm = TRUE))){
    currentEmployeesNew[[i]] <- 5
  }
  else currentEmployeesNew[[i]] <- 6
}
table(currentEmployeesNew)
net %v% "currentEmployeesNew" <- currentEmployeesNew


total_df <- data.frame(
  vertex.names = as.character(get.vertex.attribute(net, "vertex.names")),
  budget = get.vertex.attribute(net, "budgetNew"),
  currentEmployees = get.vertex.attribute(net, "currentEmployeesNew"),
  salesAll = get.vertex.attribute(net, "salesAll"),
  plantSize = get.vertex.attribute(net, "plantSizeNew"),
  members = get.vertex.attribute(net, "membersNew"),
  degreeCentrality = degree(net),
  staff = get.vertex.attribute(net, "staffNew"),
  eigenCentralityOld = get.vertex.attribute(net, "eigenCentrality"),
  eigenCentrality = evcent(net),
  betweennessCentrality = betweenness(net, cmode = "endpoints")
)

# Pull from other script files that include the functions we use here. 
source("start_model.R")
source("likelihoods_model.R")

# Append vertex attributes
library(plyr)
for(i in 1:length(x)){
  tempegonet <- x[[i]]
  vertices <- data.frame(vertex.names = get.vertex.attribute(tempegonet, "vertex.names"))
  reduced_df <- total_df[total_df$vertex.names %in% vertices$vertex.names,]
  keys <- join.keys(reduced_df, vertices, by = c("vertex.names"))
  matches<-match(keys$y,keys$x,nomatch=(keys$n+1))
  reduced_df <-  reduced_df[order(matches),]
  set.vertex.attribute(x = tempegonet, attrname = "budget", value = reduced_df$budget)
  set.vertex.attribute(x = tempegonet, attrname = "currentEmployees", value = reduced_df$currentEmployees)
  set.vertex.attribute(x = tempegonet, attrname = "salesAll", value = reduced_df$salesAll)
  set.vertex.attribute(x = tempegonet, attrname = "plantSize", value = reduced_df$plantSize)
  set.vertex.attribute(x = tempegonet, attrname = "members", value = reduced_df$members)
  set.vertex.attribute(x = tempegonet, attrname = "degreeCentrality", value = reduced_df$degreeCentrality)
  set.vertex.attribute(x = tempegonet, attrname = "staff", value = reduced_df$staff)
  set.vertex.attribute(x = tempegonet, attrname = "eigenCentrality", value = reduced_df$eigenCentrality)
  set.vertex.attribute(x = tempegonet, attrname = "betweennessCentrality", value = reduced_df$betweennessCentrality)
  x[[i]] <- tempegonet
}

##################################################################################
# Run ego-ERGM
##################################################################################

# specify the model
ego.terms <- c("edges", "concurrentties", "nodecov('members')", "nodecov('budget')", "nodecov('staff')", "nodecov('degreeCentrality')")

# Establish the offset term that adjusts for network size
ergm.offset <- rep(0,N)

# For every network, get an offset term to adjust for network size
for (i in 1:N)
  ergm.offset[i]<- -log(network.size(x[[i]]))

# Pull functions from script files
source("initialise_model.R")
cat(date(), "starting intitialisation\n")

# Conduct the two step initialization of the model
init<-init.egoergm(ego.terms, G) 

cat(date(), "finished intitialisation\n")

# Specify the maximum number of iterations for the EM algorithm to go through
STEPS <- 500
cat(date(), "starting mixture model\n")

# Call the function to perform the EM
source("terms_ego_ergm_model.R")
source("mixtures_of_ergms_model.R") # function to perform EM.  
out<-fit.mix.egoergm(ego.terms,init,obs.S,G) # run the EM algorithm. 


######################################################################
# Interpret results
######################################################################
# Table 1: Pull the results from EM and store them 
lambda<-out$lambda # store the mixture membership results of the EM algorithm
group.theta<-out$theta # store the parameter results of the EM algorithm.  Column is parameter, row is group
EE.BIC<-out$EE.BIC # store the BIC results of the EM algorithm


# Function to store lambda (intercept results) in a 3 by 1 matrix.
lambdaMatrix <- matrix(c(mean(lambda[,1]), mean(lambda[,2]), mean(lambda[,3])), nrow=3, ncol=1)
table_1 <- list(lambdaMatrix, group.theta, EE.BIC)
sink("Table1.txt")
table_1
sink()

# Create network lists by group to get groups by role
lambda_df <- as.data.frame(out$lambda)
colnames(lambda_df) <- c("Role_1", "Role_2", "Role_3")

lambda_df$role_assignment <- NA

for(i in 1:nrow(lambda_df)){
  role_assign <- which.max(out$lambda[i,])
  lambda_df$role_assignment[i] <- role_assign
}

g1_indices <- which(lambda_df$role_assignment == 1)
g2_indices <- which(lambda_df$role_assignment == 2)
g3_indices <- which(lambda_df$role_assignment == 3)

g1_nets <- x[g1_indices]
g2_nets <- x[g2_indices]
g3_nets <- x[g3_indices]

# code to plot the entire network using Fruchterman-Reingold layout with node colouration according to egoERGM results
par(mar=rep(0,4))

drawpie <- function(center,radius,probs,n=50,cols=1:length(probs))
{
  x <- c(0,cumsum(probs)/sum(probs))
  dx <- diff(x)
  np <- length(probs)
  for (i in 1:np)
  {
    t2p <- 2 * pi * seq(x[i], x[i + 1], length = n)
    xc <- center[1] + c(cos(t2p), 0) * radius
    yc <- center[2] + c(sin(t2p), 0) * radius
    polygon(xc, yc, border = TRUE, col = cols[i])
  }
}  


col.vec<-apply(lambda,1,which.max)
col.sides<-col.vec; col.sides[col.vec==1]<-3; col.sides[col.vec==2]<-4; col.sides[col.vec==3]<-5; col.sides[col.vec==4]<-50
col.sides[col.vec==5]<-6

# plot role assignments
pdf("Figure4.pdf")
coords<-plot(net,vertex.col=col.vec, layout.par=list(niter=1e3))
dev.off()

# Now, extract role assignments
roles <- col.vec

# So, these each correspond to the vertex name in the object
vertex <- net %v% 'vertex.names'
properName <- net %v% 'properName'
budget <- net %v% "budgetNew"
staff <- net %v% "staffNew" 
members <- net %v% "membersNew"
employees <- net %v% "currentEmployeesNew"
degree <- net %v% 'degreeCentrality'

rolesOut <- cbind(roles, vertex, properName, budget, staff, members, employees, degree)

#write.csv(rolesOut, "rolesOut_Covars.csv")

rolesOut <- as.data.frame(rolesOut)

# Table 2: get means by variable
sink("Table2.txt")
role1 <- rolesOut[which(rolesOut$roles == 1),]
cat("Variable Means Per Role")
cat("Teammates Budget")
mean(as.numeric(role1$budget))
cat("Teammates Staff")
mean(as.numeric(role1$staff))
cat("Teammates Members")
mean(as.numeric(role1$members))
cat("Teammates Degree")
mean(as.numeric(role1$degree))

role2 <- rolesOut[which(rolesOut$roles == 2),]
cat("Coordinators Budget")
mean(as.numeric(role2$budget))
cat("Coordinators Staff")
mean(as.numeric(role2$staff))
cat("Coordinators Members")
mean(as.numeric(role2$members))
cat("Coordinators Degree")
mean(as.numeric(role2$degree))

# get edgelist
role3 <- rolesOut[which(rolesOut$roles == 3),]
cat("Peripheral Specialists Budget")
mean(as.numeric(role3$budget))
cat("Peripheral Specialists Staff")
mean(as.numeric(role3$staff))
cat("Peripheral Specialists Members")
mean(as.numeric(role3$members))
cat("Peripheral Specialists Degree")
mean(as.numeric(role3$degree))
sink()

write.csv(role1$properName, file = "Table4_Teammates.csv")
write.csv(role2$properName, file = "Table5_Coordinators.csv")
write.csv(role3$properName, file = "Table6_PeripheralSpecialists.csv")

save.image("BCCN_ReplcatedResults.RData")

####################################################################
# Goodness of fit -- This repeats the process for each term
####################################################################

########################
# Spec to concurrentties
########################
set.seed(1234)

# specify the model
ego.terms <- c("edges", 
               #  "concurrentties", 
               "nodecov('members')", 
               "nodecov('budget')",
               "nodecov('staff')", 
               "nodecov('degreeCentrality')")


# Establish the offset term that adjusts for network size
ergm.offset <- rep(0,N)

# For every network, get an offset term to adjust for network size
for (i in 1:N)
  ergm.offset[i]<- -log(network.size(x[[i]]))

# Pull functions from script files
source("initialise_model.R")
cat(date(), "starting intitialisation\n")

# Specify ergm terms for each role - these are predictors for if an actor takes on a certain role.  
# This returns covariates for three groups
G <- 3
init<-init.egoergm(ego.terms, G) 
# init

cat(date(), "finished intitialisation\n")

# Specify the maximum number of iterations for the EM algorithm to go through
STEPS <- 500
cat(date(), "starting mixture model\n")

# Call the function to perform the EM
source("terms_ego_ergm_model.R")
source("mixtures_of_ergms_model.R") # function to perform EM.  
out<-fit.mix.egoergm(ego.terms,init,obs.S,G) # run the EM algorithm. 

# Pull the results from EM and store them
lambda<-out$lambda # store the mixture membership results of the EM algorithm
group.theta<-out$theta # store the parameter results of the EM algorithm.  Column is parameter, row is group
EE.BIC<-out$EE.BIC # store the BIC results of the EM algorithm

# Function to store lambda (intercept results) in a 3 by 1 matrix.
lambdaMatrix <- matrix(c(mean(lambda[,1]), mean(lambda[,2]), mean(lambda[,3])), nrow=3, ncol=1)
lambdaMatrix

# Create network lists by group
lambda_df <- as.data.frame(out$lambda)
colnames(lambda_df) <- c("Role_1", "Role_2", "Role_3")

lambda_df$role_assignment <- NA

for(i in 1:nrow(lambda_df)){
  role_assign <- which.max(out$lambda[i,])
  lambda_df$role_assignment[i] <- role_assign
}

g1_indices <- which(lambda_df$role_assignment == 1)
g2_indices <- which(lambda_df$role_assignment == 2)
g3_indices <- which(lambda_df$role_assignment == 3)

g1_nets <- x[g1_indices]
g2_nets <- x[g2_indices]
g3_nets <- x[g3_indices]

# code to plot the entire network using Fruchterman-Reingold layout with node colouration according to egoERGM results
par(mar=rep(0,4))

col.vec<-apply(lambda,1,which.max)
col.sides<-col.vec; col.sides[col.vec==1]<-3; col.sides[col.vec==2]<-4; col.sides[col.vec==3]<-5; col.sides[col.vec==4]<-50
col.sides[col.vec==5]<-6

coords<-plot(net,vertex.col=col.vec, layout.par=list(niter=1e3))


# Now, extract role assignments
roles <- col.vec

# So, these each correspond to the vertex name in the object
vertex <- net %v% 'vertex.names'
properName <- net %v% 'properName'
budget <- net %v% "budgetNew"
staff <- net %v% "staffNew" 
members <- net %v% "membersNew"
employees <- net %v% "currentEmployeesNew"
degree <- net %v% 'degreeCentrality'

rolesOut <- cbind(roles, vertex, properName, budget, staff, members, employees, degree)

#write.csv(rolesOut, "rolesOut_noCT_Covars.csv")

rolesOut <- as.data.frame(rolesOut)
# get means by variable
role1 <- rolesOut[which(rolesOut$roles == 1),]
mean(as.numeric(role1$budget))
mean(as.numeric(role1$staff))
mean(as.numeric(role1$members))
mean(as.numeric(role1$degree))

role2 <- rolesOut[which(rolesOut$roles == 2),]
mean(as.numeric(role2$budget))
mean(as.numeric(role2$staff))
mean(as.numeric(role2$members))
mean(as.numeric(role2$degree))

# get edgelist
role3 <- rolesOut[which(rolesOut$roles == 3),]
mean(as.numeric(role3$budget))
mean(as.numeric(role3$staff))
mean(as.numeric(role3$members))
mean(as.numeric(role3$degree))


# write.csv(as.edgelist(net), "edgelist_noCT.csv")

save.image("ModelPresented_noCT.RData")

load("ModelPresented_noCT.RData")
noCT_mat <- lambdaMatrix

#######################
# Spec to members
#######################
set.seed(1234)

# specify the model
ego.terms <- c("edges", 
               "concurrentties", 
               #"nodecov('members')", 
               "nodecov('budget')",
               "nodecov('staff')", 
               "nodecov('degreeCentrality')")

#################################

# Establish the offset term that adjusts for network size
ergm.offset <- rep(0,N)

# For every network, get an offset term to adjust for network size
for (i in 1:N)
  ergm.offset[i]<- -log(network.size(x[[i]]))

# Pull functions from script files
source("initialise_model.R")
cat(date(), "starting intitialisation\n")

# Specify ergm terms for each role - these are predictors for if an actor takes on a certain role.  
# This returns covariates for three groups
G <- 3
init<-init.egoergm(ego.terms, G) 
# init

cat(date(), "finished intitialisation\n")

# Specify the maximum number of iterations for the EM algorithm to go through
STEPS <- 500
cat(date(), "starting mixture model\n")

# Call the function to perform the EM
source("terms_ego_ergm_model.R")
source("mixtures_of_ergms_model.R") # function to perform EM.  
out<-fit.mix.egoergm(ego.terms,init,obs.S,G) # run the EM algorithm. 

# Pull the results from EM and store them
lambda<-out$lambda # store the mixture membership results of the EM algorithm
group.theta<-out$theta # store the parameter results of the EM algorithm.  Column is parameter, row is group
EE.BIC<-out$EE.BIC # store the BIC results of the EM algorithm

# Function to store lambda (intercept results) in a 3 by 1 matrix.
lambdaMatrix <- matrix(c(mean(lambda[,1]), mean(lambda[,2]), mean(lambda[,3])), nrow=3, ncol=1)
lambdaMatrix

# Create network lists by group
lambda_df <- as.data.frame(out$lambda)
colnames(lambda_df) <- c("Role_1", "Role_2", "Role_3")

lambda_df$role_assignment <- NA

for(i in 1:nrow(lambda_df)){
  role_assign <- which.max(out$lambda[i,])
  lambda_df$role_assignment[i] <- role_assign
}

g1_indices <- which(lambda_df$role_assignment == 1)
g2_indices <- which(lambda_df$role_assignment == 2)
g3_indices <- which(lambda_df$role_assignment == 3)

g1_nets <- x[g1_indices]
g2_nets <- x[g2_indices]
g3_nets <- x[g3_indices]

# code to plot the entire network using Fruchterman-Reingold layout with node colouration according to egoERGM results
par(mar=rep(0,4))

col.vec<-apply(lambda,1,which.max)
col.sides<-col.vec; col.sides[col.vec==1]<-3; col.sides[col.vec==2]<-4; col.sides[col.vec==3]<-5; col.sides[col.vec==4]<-50
col.sides[col.vec==5]<-6

pdf("Figure5.pdf")
coords<-plot(net,vertex.col=col.vec, layout.par=list(niter=1e3))
dev.off()

# Now, extract role assignments
roles <- col.vec

# So, these each correspond to the vertex name in the object
vertex <- net %v% 'vertex.names'
properName <- net %v% 'properName'
budget <- net %v% "budgetNew"
staff <- net %v% "staffNew" 
members <- net %v% "membersNew"
employees <- net %v% "currentEmployeesNew"
degree <- net %v% 'degreeCentrality'

rolesOut <- cbind(roles, vertex, properName, budget, staff, members, employees, degree)

#write.csv(rolesOut, "rolesOut_noMembers_Covars.csv")

rolesOut <- as.data.frame(rolesOut)
# get means by variable
role1 <- rolesOut[which(rolesOut$roles == 1),]
mean(as.numeric(role1$budget))
mean(as.numeric(role1$staff))
mean(as.numeric(role1$members))
mean(as.numeric(role1$degree))

role2 <- rolesOut[which(rolesOut$roles == 2),]
mean(as.numeric(role2$budget))
mean(as.numeric(role2$staff))
mean(as.numeric(role2$members))
mean(as.numeric(role2$degree))

# get edgelist
role3 <- rolesOut[which(rolesOut$roles == 3),]
mean(as.numeric(role3$budget))
mean(as.numeric(role3$staff))
mean(as.numeric(role3$members))
mean(as.numeric(role3$degree))


# write.csv(as.edgelist(net), "edgelist_noMembers.csv")

save.image("ModelPresented_noMembers.RData")

#######################
# Spec to budget
#######################
set.seed(1234)

# specify the model
ego.terms <- c("edges", 
               "concurrentties", 
               "nodecov('members')", 
               #"nodecov('budget')",
               "nodecov('staff')", 
               "nodecov('degreeCentrality')")

#################################

# Establish the offset term that adjusts for network size
ergm.offset <- rep(0,N)

# For every network, get an offset term to adjust for network size
for (i in 1:N)
  ergm.offset[i]<- -log(network.size(x[[i]]))

# Pull functions from script files
source("initialise_model.R")
cat(date(), "starting intitialisation\n")

# Specify ergm terms for each role - these are predictors for if an actor takes on a certain role.  
# This returns covariates for three groups
G <- 3
init<-init.egoergm(ego.terms, G) 
# init

cat(date(), "finished intitialisation\n")

# Specify the maximum number of iterations for the EM algorithm to go through
STEPS <- 500
cat(date(), "starting mixture model\n")

# Call the function to perform the EM
source("terms_ego_ergm_model.R")
source("mixtures_of_ergms_model.R") # function to perform EM.  
out<-fit.mix.egoergm(ego.terms,init,obs.S,G) # run the EM algorithm. 

# Pull the results from EM and store them
lambda<-out$lambda # store the mixture membership results of the EM algorithm
group.theta<-out$theta # store the parameter results of the EM algorithm.  Column is parameter, row is group
EE.BIC<-out$EE.BIC # store the BIC results of the EM algorithm

# Function to store lambda (intercept results) in a 3 by 1 matrix.
lambdaMatrix <- matrix(c(mean(lambda[,1]), mean(lambda[,2]), mean(lambda[,3])), nrow=3, ncol=1)
lambdaMatrix



# Create network lists by group
lambda_df <- as.data.frame(out$lambda)
colnames(lambda_df) <- c("Role_1", "Role_2", "Role_3")

lambda_df$role_assignment <- NA

for(i in 1:nrow(lambda_df)){
  role_assign <- which.max(out$lambda[i,])
  lambda_df$role_assignment[i] <- role_assign
}

g1_indices <- which(lambda_df$role_assignment == 1)
g2_indices <- which(lambda_df$role_assignment == 2)
g3_indices <- which(lambda_df$role_assignment == 3)

g1_nets <- x[g1_indices]
g2_nets <- x[g2_indices]
g3_nets <- x[g3_indices]

# code to plot the entire network using Fruchterman-Reingold layout with node colouration according to egoERGM results
par(mar=rep(0,4))

col.vec<-apply(lambda,1,which.max)
col.sides<-col.vec; col.sides[col.vec==1]<-3; col.sides[col.vec==2]<-4; col.sides[col.vec==3]<-5; col.sides[col.vec==4]<-50
col.sides[col.vec==5]<-6

pdf("Figure6.pdf")
coords<-plot(net,vertex.col=col.vec, layout.par=list(niter=1e3))
dev.off()

# Now, extract role assignments
roles <- col.vec

# So, these each correspond to the vertex name in the object
vertex <- net %v% 'vertex.names'
properName <- net %v% 'properName'
budget <- net %v% "budgetNew"
staff <- net %v% "staffNew" 
members <- net %v% "membersNew"
employees <- net %v% "currentEmployeesNew"
degree <- net %v% 'degreeCentrality'

rolesOut <- cbind(roles, vertex, properName, budget, staff, members, employees, degree)

#write.csv(rolesOut, "rolesOut_noBudget_Covars.csv")

rolesOut <- as.data.frame(rolesOut)
# get means by variable
role1 <- rolesOut[which(rolesOut$roles == 1),]
mean(as.numeric(role1$budget))
mean(as.numeric(role1$staff))
mean(as.numeric(role1$members))
mean(as.numeric(role1$degree))

role2 <- rolesOut[which(rolesOut$roles == 2),]
mean(as.numeric(role2$budget))
mean(as.numeric(role2$staff))
mean(as.numeric(role2$members))
mean(as.numeric(role2$degree))

# get edgelist
role3 <- rolesOut[which(rolesOut$roles == 3),]
mean(as.numeric(role3$budget))
mean(as.numeric(role3$staff))
mean(as.numeric(role3$members))
mean(as.numeric(role3$degree))


# write.csv(as.edgelist(net), "edgelist_noBudget.csv")

save.image("ModelPresented_noBudget.RData")

#######################
# Spec to Staff
#######################
set.seed(1234)

# specify the model
ego.terms <- c("edges", 
               "concurrentties", 
               "nodecov('members')", 
               "nodecov('budget')",
               #"nodecov('staff')", 
               "nodecov('degreeCentrality')")

# Establish the offset term that adjusts for network size
ergm.offset <- rep(0,N)

# For every network, get an offset term to adjust for network size
for (i in 1:N)
  ergm.offset[i]<- -log(network.size(x[[i]]))

# Pull functions from script files
source("initialise_model.R")
cat(date(), "starting intitialisation\n")

# Specify ergm terms for each role - these are predictors for if an actor takes on a certain role.  
# This returns covariates for three groups
G <- 3
init<-init.egoergm(ego.terms, G) 
# init

cat(date(), "finished intitialisation\n")

# Specify the maximum number of iterations for the EM algorithm to go through
STEPS <- 500
cat(date(), "starting mixture model\n")

# Call the function to perform the EM
source("terms_ego_ergm_model.R")
source("mixtures_of_ergms_model.R") # function to perform EM.  
out<-fit.mix.egoergm(ego.terms,init,obs.S,G) # run the EM algorithm. 

# Pull the results from EM and store them
lambda<-out$lambda # store the mixture membership results of the EM algorithm
group.theta<-out$theta # store the parameter results of the EM algorithm.  Column is parameter, row is group
EE.BIC<-out$EE.BIC # store the BIC results of the EM algorithm

# Function to store lambda (intercept results) in a 3 by 1 matrix.
lambdaMatrix <- matrix(c(mean(lambda[,1]), mean(lambda[,2]), mean(lambda[,3])), nrow=3, ncol=1)
lambdaMatrix

# Create network lists by group
lambda_df <- as.data.frame(out$lambda)
colnames(lambda_df) <- c("Role_1", "Role_2", "Role_3")

lambda_df$role_assignment <- NA

for(i in 1:nrow(lambda_df)){
  role_assign <- which.max(out$lambda[i,])
  lambda_df$role_assignment[i] <- role_assign
}

g1_indices <- which(lambda_df$role_assignment == 1)
g2_indices <- which(lambda_df$role_assignment == 2)
g3_indices <- which(lambda_df$role_assignment == 3)

g1_nets <- x[g1_indices]
g2_nets <- x[g2_indices]
g3_nets <- x[g3_indices]

# code to plot the entire network using Fruchterman-Reingold layout with node colouration according to egoERGM results
par(mar=rep(0,4))

col.vec<-apply(lambda,1,which.max)
col.sides<-col.vec; col.sides[col.vec==1]<-3; col.sides[col.vec==2]<-4; col.sides[col.vec==3]<-5; col.sides[col.vec==4]<-50
col.sides[col.vec==5]<-6

pdf("Figure7.pdf")
coords<-plot(net,vertex.col=col.vec, layout.par=list(niter=1e3))
dev.off()

# Now, extract role assignments
roles <- col.vec

# So, these each correspond to the vertex name in the object
vertex <- net %v% 'vertex.names'
properName <- net %v% 'properName'
budget <- net %v% "budgetNew"
staff <- net %v% "staffNew" 
members <- net %v% "membersNew"
employees <- net %v% "currentEmployeesNew"
degree <- net %v% 'degreeCentrality'

rolesOut <- cbind(roles, vertex, properName, budget, staff, members, employees, degree)

#write.csv(rolesOut, "rolesOut_noStaff_Covars.csv")

rolesOut <- as.data.frame(rolesOut)
# get means by variable
role1 <- rolesOut[which(rolesOut$roles == 1),]
mean(as.numeric(role1$budget))
mean(as.numeric(role1$staff))
mean(as.numeric(role1$members))
mean(as.numeric(role1$degree))

role2 <- rolesOut[which(rolesOut$roles == 2),]
mean(as.numeric(role2$budget))
mean(as.numeric(role2$staff))
mean(as.numeric(role2$members))
mean(as.numeric(role2$degree))

# get edgelist
role3 <- rolesOut[which(rolesOut$roles == 3),]
mean(as.numeric(role3$budget))
mean(as.numeric(role3$staff))
mean(as.numeric(role3$members))
mean(as.numeric(role3$degree))


# write.csv(as.edgelist(net), "edgelist_noStaff.csv")

save.image("ModelPresented_noStaff.RData")

#######################
# Spec to Degree
#######################
set.seed(1234)

# specify the model
ego.terms <- c("edges", 
               "concurrentties", 
               "nodecov('members')", 
               "nodecov('budget')",
               "nodecov('staff')" 
               #  "nodecov('degreeCentrality')"
)

#################################

# Establish the offset term that adjusts for network size
ergm.offset <- rep(0,N)

# For every network, get an offset term to adjust for network size
for (i in 1:N)
  ergm.offset[i]<- -log(network.size(x[[i]]))

# Pull functions from script files
source("initialise_model.R")
cat(date(), "starting intitialisation\n")

# Specify ergm terms for each role - these are predictors for if an actor takes on a certain role.  
# This returns covariates for three groups
G <- 3
init<-init.egoergm(ego.terms, G) 
# init

cat(date(), "finished intitialisation\n")

# Specify the maximum number of iterations for the EM algorithm to go through
STEPS <- 500
cat(date(), "starting mixture model\n")

# Call the function to perform the EM
source("terms_ego_ergm_model.R")
source("mixtures_of_ergms_model.R") # function to perform EM.  
out<-fit.mix.egoergm(ego.terms,init,obs.S,G) # run the EM algorithm. 

# Pull the results from EM and store them
lambda<-out$lambda # store the mixture membership results of the EM algorithm
group.theta<-out$theta # store the parameter results of the EM algorithm.  Column is parameter, row is group
EE.BIC<-out$EE.BIC # store the BIC results of the EM algorithm

# Function to store lambda (intercept results) in a 3 by 1 matrix.
lambdaMatrix <- matrix(c(mean(lambda[,1]), mean(lambda[,2]), mean(lambda[,3])), nrow=3, ncol=1)
lambdaMatrix

# Create network lists by group
lambda_df <- as.data.frame(out$lambda)
colnames(lambda_df) <- c("Role_1", "Role_2", "Role_3")

lambda_df$role_assignment <- NA

for(i in 1:nrow(lambda_df)){
  role_assign <- which.max(out$lambda[i,])
  lambda_df$role_assignment[i] <- role_assign
}

g1_indices <- which(lambda_df$role_assignment == 1)
g2_indices <- which(lambda_df$role_assignment == 2)
g3_indices <- which(lambda_df$role_assignment == 3)

g1_nets <- x[g1_indices]
g2_nets <- x[g2_indices]
g3_nets <- x[g3_indices]

# code to plot the entire network using Fruchterman-Reingold layout with node colouration according to egoERGM results
par(mar=rep(0,4))

col.vec<-apply(lambda,1,which.max)
col.sides<-col.vec; col.sides[col.vec==1]<-3; col.sides[col.vec==2]<-4; col.sides[col.vec==3]<-5; col.sides[col.vec==4]<-50
col.sides[col.vec==5]<-6

pdf("Figure8.pdf")
coords<-plot(net,vertex.col=col.vec, layout.par=list(niter=1e3))
dev.off()

# Now, extract role assignments
roles <- col.vec

# So, these each correspond to the vertex name in the object
vertex <- net %v% 'vertex.names'
properName <- net %v% 'properName'
budget <- net %v% "budgetNew"
staff <- net %v% "staffNew" 
members <- net %v% "membersNew"
employees <- net %v% "currentEmployeesNew"
degree <- net %v% 'degreeCentrality'

rolesOut <- cbind(roles, vertex, properName, budget, staff, members, employees, degree)

#write.csv(rolesOut, "rolesOut_noDegree_Covars.csv")

rolesOut <- as.data.frame(rolesOut)
# get means by variable
role1 <- rolesOut[which(rolesOut$roles == 1),]
mean(as.numeric(role1$budget))
mean(as.numeric(role1$staff))
mean(as.numeric(role1$members))
mean(as.numeric(role1$degree))

role2 <- rolesOut[which(rolesOut$roles == 2),]
mean(as.numeric(role2$budget))
mean(as.numeric(role2$staff))
mean(as.numeric(role2$members))
mean(as.numeric(role2$degree))

# get edgelist
role3 <- rolesOut[which(rolesOut$roles == 3),]
mean(as.numeric(role3$budget))
mean(as.numeric(role3$staff))
mean(as.numeric(role3$members))
mean(as.numeric(role3$degree))


# write.csv(as.edgelist(net), "edgelist_noDegree.csv")

save.image("ModelPresented_noDegree.RData")


#################
# Compare These
#################
load("ModelPresented_noCT.RData")
noCT_mat <- lambdaMatrix
noCT_plot <-plot(net,vertex.col=col.vec, layout.par=list(niter=1e3))

load("ModelPresented_noMembers.RData")
noMem_mat <- lambdaMatrix
noMem_plot <-plot(net,vertex.col=col.vec, layout.par=list(niter=1e3))


load("ModelPresented_noBudget.RData")
noBud_mat <- lambdaMatrix
noBud_plot <-plot(net,vertex.col=col.vec, layout.par=list(niter=1e3))

load("ModelPresented_noStaff.RData")
noSta_mat <- lambdaMatrix
noSta_plot <-plot(net,vertex.col=col.vec, layout.par=list(niter=1e3))

load("ModelPresented_noDegree.RData")
noDeg_mat <- lambdaMatrix
noDeg_plot <-plot(net,vertex.col=col.vec, layout.par=list(niter=1e3))

load("ModelPresented.RData")
lambdaMatrix
plot <-plot(net,vertex.col=col.vec, layout.par=list(niter=1e3))

# # deg matters (removal leads to some minor changes), sta removal (two blocks), bud (removal leads to fewer coordinators), mem (matters, removal leads to two blocks), CT doesn't matter
# reverse because number assignments differ for some
gof_mat <- cbind(lambdaMatrix, rev(noCT_mat), noMem_mat, noBud_mat, noSta_mat, rev(noDeg_mat))

gof_mat <- t(round(gof_mat, digits = 3))

rownames(gof_mat) <- c("Baseline", "No Concurrent Ties", "No Members", "No Budget", "No Staff", "No Degree")
colnames(gof_mat) <- c("Role 1", "Role 2", "Role 3")
table_3<-gof_mat
library(xtable)
xtable(table_3)
# Produce Table 3
sink("Table3.txt")
table_3
sink()

save.image("BCCN_GOF.RData")

###########################################################################
# Descriptives
###########################################################################

load("ac-net-env-case.RData") # imports a network object "net"

net_original <- net

library(statnet)
deg <- degree(net_original)
sink("OriginalNetworkDescriptions.txt")
cat("Original Network Size")
network.size(net_original)
cat("Original Network Edgecount")
network.edgecount(net_original)
cat("Original Network Density")
network.density(net_original)
cat("Original Clustering Coefficient")
triad.census(net_original, mode="graph")
cat("Original Isolates")
length(isolates(net_original))
cat("Original Degree Distribution")
summary(deg)
sink()

d_plot <- ggplot(data = melt(deg), aes(melt(deg)$value)) +
  geom_histogram(alpha = 0.75,
                 aes(fill=..count..),
                 binwidth = 10) +
  theme_bw() +
  labs(x="Degree", y="Count") +
  scale_fill_gradient("Count", low = "springgreen1", high = "springgreen4") +
  geom_vline(xintercept = 31.78, linetype = "longdash", colour = "grey40") 
d_plot

pdf("Figure1A.pdf")
d_plot
dev.off()



library(reshape2)
library(ggplot2)
net_geo <- melt(geodist(net)$gdist)
net_geo <- net_geo[net_geo$value != 0, ]
#net_geo$value <- ifelse(is.infinite(net_geo$value) == TRUE, 0, net_geo$value) 

table(is.infinite(net_geo$value))

mean_geo <- mean(net_geo[is.infinite(net_geo$value) == FALSE,]$value)

g_plot <- ggplot(data = net_geo, aes(net_geo$value)) +
  geom_histogram(alpha = 0.75,
                 aes(fill=..count..),
                 binwidth = 1) +
  theme_bw() +
  labs(x="Geodesic Distance", y="Count") +
  scale_fill_gradient("Count", low = "springgreen1", high = "springgreen4")


pdf("Figure1B.pdf")
g_plot
dev.off()



